Multivariate Covariance Generalized Linear Models for the Analysis of Experimental Data

github.com/leg-ufpr/mcglm4aed

1 Data description and objectives

#-----------------------------------------------------------------------
# Packages.

rm(list = ls())
library(lattice)
library(latticeExtra)
library(car)
library(candisc)
library(corrplot)

2 Petal measures

#-----------------------------------------------------------------------

scatterplotMatrix(~Sepal.Length + Sepal.Width +
                      Petal.Length + Petal.Width | Species,
                  data = iris,
                  smooth = FALSE,
                  reg.line = FALSE,
                  ellipse = TRUE,
                  by.groups = TRUE,
                  gap = 0,
                  diagonal = "density")

#-----------------------------------------------------------------------
# Petal.Length & Petal.Width.

scatterplotMatrix(~Petal.Length + Petal.Width | Species,
                  data = iris,
                  smooth = FALSE,
                  reg.line = FALSE,
                  ellipse = TRUE,
                  by.groups = TRUE,
                  gap = 0,
                  diagonal = "density")

#-----------------------------------------------------------------------
# MLM fitting.

# Iris full model.
m1 <- lm(cbind(Petal.Length,
               Petal.Width) ~ Species,
         data = iris)

# Iris null model.
m0 <- update(m1, . ~ 1)

# MANOVA with Pillai test.
anova(m1)
## Analysis of Variance Table
## 
##              Df  Pillai approx F num Df den Df    Pr(>F)    
## (Intercept)   1 0.98786   5939.2      2    146 < 2.2e-16 ***
## Species       2 1.04645     80.7      4    294 < 2.2e-16 ***
## Residuals   147                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Extract the raw residuals.
r <- residuals(m1)

# Checking the models assumptions on the residuals.
scatterplotMatrix(r,
                  gap = 0,
                  smooth = FALSE,
                  reg.line = FALSE,
                  ellipse = TRUE,
                  diagonal = "qqplot")

#-----------------------------------------------------------------------
# Doing the canonical analysis using eigen decomposition.

# Error SSP of the full and null models.
S_full <- crossprod(residuals(m1))
S_null <- crossprod(residuals(m0))

# Extra SSP due the hypothesis.
S_extr <- S_null - S_full

# Eigen decomposition -> canonical analysis.
ei <- eigen(solve(S_full, S_extr))
ei
## eigen() decomposition
## $values
## [1] 19.6773042  0.1047462
## 
## $vectors
##           [,1]       [,2]
## [1,] 0.5407512 -0.3939360
## [2,] 0.8411826  0.9191379
# Cumulated proportion.
cumsum(ei$values)/sum(ei$values)
## [1] 0.994705 1.000000
# Scores (canonical variables).
scores <- as.matrix(m1$model[1]) %*% ei$vectors[, 1:2]

# Plot of the scores.
xyplot(scores[, 2] ~ scores[, 1],
       groups = iris$Species,
       grid = TRUE,
       auto.key = list(columns = 3,
                       title = "Species",
                       cex.title = 1),
       xlab = "First canonical dimension",
       ylab = "Second canonical dimension",
       aspect = "iso") +
    layer(panel.ellipse(...))

#-----------------------------------------------------------------------
# Using the `candisc` package.

c1 <- candisc(m1, term = "Species")
str(c1)
## List of 15
##  $ dfh        : num 2
##  $ dfe        : int 147
##  $ eigenvalues: num [1:2] 19.677 0.105
##  $ canrsq     : num [1:2] 0.9516 0.0948
##  $ pct        : num [1:2] 99.47 0.53
##  $ rank       : num 2
##  $ ndim       : num 2
##  $ means      :'data.frame': 3 obs. of  2 variables:
##   ..$ Can1: num [1:3] -5.84 1.08 4.76
##   ..$ Can2: num [1:3] 0.155 -0.446 0.291
##  $ factors    :'data.frame': 150 obs. of  1 variable:
##   ..$ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ term       : chr "Species"
##  $ terms      : chr "Species"
##  $ coeffs.raw : num [1:2, 1:2] 1.54 2.4 -2.16 5.04
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:2] "Petal.Length" "Petal.Width"
##   .. ..$ : chr [1:2] "Can1" "Can2"
##  $ coeffs.std : num [1:2, 1:2] 0.665 0.492 -0.93 1.032
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:2] "Petal.Length" "Petal.Width"
##   .. ..$ : chr [1:2] "Can1" "Can2"
##  $ structure  : num [1:2, 1:2] 0.994 0.987 -0.109 0.163
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:2] "Petal.Length" "Petal.Width"
##   .. ..$ : chr [1:2] "Can1" "Can2"
##  $ scores     :'data.frame': 150 obs. of  3 variables:
##   ..$ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ Can1   : num [1:150] -6.04 -6.04 -6.2 -5.89 -6.04 ...
##   ..$ Can2   : num [1:150] 0.0569 0.0569 0.273 -0.1592 0.0569 ...
##  - attr(*, "class")= chr "candisc"
summary(c1)
## 
## Canonical Discriminant Analysis for Species:
## 
##     CanRsq Eigenvalue Difference Percent Cumulative
## 1 0.951638   19.67730     19.573 99.4705      99.47
## 2 0.094815    0.10475     19.573  0.5295     100.00
## 
## Class means:
## 
##               Can1     Can2
## setosa     -5.8362  0.15489
## versicolor  1.0796 -0.44620
## virginica   4.7566  0.29132
## 
##  std coefficients:
##                 Can1     Can2
## Petal.Length 0.66460 -0.93005
## Petal.Width  0.49165  1.03197
# The canonical scores.
c("my eigen" = xyplot(scores[, 2] ~ scores[, 1],
                      groups = iris$Species,
                      grid = TRUE,
                      auto.key = list(columns = 3,
                                      title = "Species",
                                      cex.title = 1),
                      xlab = "First canonical dimension",
                      ylab = "Second canonical dimension"),
  "candisc" = xyplot(Can2 ~ Can1,
                     groups = Species,
                     data = c1$scores,
                     grid = TRUE,
                     auto.key = list(columns = 3,
                                     title = "Species",
                                     cex.title = 1),
                     xlab = "First canonical dimension",
                     ylab = "Second canonical dimension"),
  layout = c(1, NA))
## Warning in formals(fun): argument is not a function

ei$vectors[, 1:2]
##           [,1]       [,2]
## [1,] 0.5407512 -0.3939360
## [2,] 0.8411826  0.9191379
c1$coeffs.raw
##                  Can1      Can2
## Petal.Length 1.544371 -2.161222
## Petal.Width  2.402394  5.042599
c1$coeffs.raw/ei$vectors[, 1:2]
##                  Can1     Can2
## Petal.Length 2.855973 5.486227
## Petal.Width  2.855973 5.486227
# The plot() method for `candisc` objects.
plot(c1, asp = 1)
## Vector scale factor set to 7.709
grid()

# ATTENTION: upened issue! Study the plot.candisc() function to see
# wheather they scale the vectors.
# getS3method(f = "plot", class = "candisc")

# Scores: canonical variates.
xy_can <-
    xyplot(Can2 ~ Can1,
           groups = Species,
           data = c1$scores,
           xlab = NULL,
           ylab = NULL,
           aspect = "iso",
           auto.key = list(columns = 3),
           grid = TRUE) +
    layer(panel.ellipse(...)) +
    layer(panel.arrows(0, 0,
                       c1$coeffs.raw[, 1],
                       c1$coeffs.raw[, 2],
                       length = 0.1))

xy_ori <-
    xyplot(Petal.Length ~ Petal.Width,
           groups = Species,
           aspect = "iso",
           data = iris,
           grid = TRUE) +
    layer(panel.ellipse(...))

c("Canonical" = xy_can,
  "Original" = xy_ori,
  layout = c(1, 2))
## Warning in formals(fun): argument is not a function

#--------------------------------------------
# Univariate analysis of each canical variate.

an0 <- lm(scores ~ Species, data = iris)
summary(an0)
## Response Y1 :
## 
## Call:
## lm(formula = Y1 ~ Species, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.87145 -0.16373 -0.01604  0.21182  0.95942 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        0.99751    0.04952   20.14   <2e-16 ***
## Speciesversicolor  2.42150    0.07003   34.58   <2e-16 ***
## Speciesvirginica   3.70898    0.07003   52.96   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3501 on 147 degrees of freedom
## Multiple R-squared:  0.9516, Adjusted R-squared:  0.951 
## F-statistic:  1446 on 2 and 147 DF,  p-value: < 2.2e-16
## 
## 
## Response Y2 :
## 
## Call:
## lm(formula = Y2 ~ Species, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.59429 -0.09219  0.01129  0.08817  0.52182 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -0.34983    0.02578 -13.571  < 2e-16 ***
## Speciesversicolor -0.10956    0.03645  -3.005  0.00312 ** 
## Speciesvirginica   0.02487    0.03645   0.682  0.49623    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1823 on 147 degrees of freedom
## Multiple R-squared:  0.09481,    Adjusted R-squared:  0.0825 
## F-statistic: 7.699 on 2 and 147 DF,  p-value: 0.000661

3 Sepal measures

#-----------------------------------------------------------------------
# Sepal.Length & Sepal.Width.

scatterplotMatrix(~Sepal.Length + Sepal.Width | Species,
                  data = iris,
                  smooth = FALSE,
                  reg.line = FALSE,
                  ellipse = TRUE,
                  by.groups = TRUE,
                  gap = 0,
                  diagonal = "density")

# Iris full model.
m1 <- lm(cbind(Sepal.Length,
               Sepal.Width) ~ Species,
         data = iris)

# Iris null model.
m0 <- update(m1, . ~ 1)

# MANOVA with Pillai test.
anova(m1)
## Analysis of Variance Table
## 
##              Df  Pillai approx F num Df den Df    Pr(>F)    
## (Intercept)   1 0.99311  10518.8      2    146 < 2.2e-16 ***
## Species       2 0.94531     65.9      4    294 < 2.2e-16 ***
## Residuals   147                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Extract the raw residuals.
r <- residuals(m1)

# Checking the models assumptions on the residuals.
scatterplotMatrix(r,
                  gap = 0,
                  smooth = FALSE,
                  reg.line = FALSE,
                  ellipse = TRUE,
                  diagonal = "qqplot")

#-----------------------------------------------------------------------
# Doing the canonical analysis using eigen decomposition.

# Error SSP of the full and null models.
S_full <- crossprod(residuals(m1))
S_null <- crossprod(residuals(m0))

# Extra SSP due the hypothesis.
S_extr <- S_null - S_full

# Eigen decomposition -> canonical analysis.
ei <- eigen(solve(S_full, S_extr))
ei
## eigen() decomposition
## $values
## [1] 4.1717987 0.1609957
## 
## $vectors
##            [,1]      [,2]
## [1,]  0.6118383 0.3624970
## [2,] -0.7909829 0.9319849
# Cumulated proportion.
cumsum(ei$values)/sum(ei$values)
## [1] 0.9628425 1.0000000
# Scores (canonical variables).
scores <- as.matrix(m1$model[1]) %*% ei$vectors[, 1:2]

# Plot of the scores.
xyplot(scores[, 2] ~ scores[, 1],
       groups = iris$Species,
       grid = TRUE,
       auto.key = list(columns = 3,
                       title = "Species",
                       cex.title = 1),
       xlab = "First canonical dimension",
       ylab = "Second canonical dimension",
       aspect = "iso") +
    layer(panel.ellipse(...))

#-----------------------------------------------------------------------
# Using the `candisc` package.

c1 <- candisc(m1, term = "Species")
str(c1)
## List of 15
##  $ dfh        : num 2
##  $ dfe        : int 147
##  $ eigenvalues: num [1:2] 4.172 0.161
##  $ canrsq     : num [1:2] 0.807 0.139
##  $ pct        : num [1:2] 96.28 3.72
##  $ rank       : num 2
##  $ ndim       : num 2
##  $ means      :'data.frame': 3 obs. of  2 variables:
##   ..$ Can1: num [1:3] -2.819 0.994 1.825
##   ..$ Can2: num [1:3] 0.0943 -0.5267 0.4324
##  $ factors    :'data.frame': 150 obs. of  1 variable:
##   ..$ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ term       : chr "Species"
##  $ terms      : chr "Species"
##  $ coeffs.raw : num [1:2, 1:2] 2.141 -2.768 0.815 2.096
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:2] "Sepal.Length" "Sepal.Width"
##   .. ..$ : chr [1:2] "Can1" "Can2"
##  $ coeffs.std : num [1:2, 1:2] 1.102 -0.94 0.42 0.712
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:2] "Sepal.Length" "Sepal.Width"
##   .. ..$ : chr [1:2] "Can1" "Can2"
##  $ structure  : num [1:2, 1:2] 0.848 -0.626 0.53 0.779
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:2] "Sepal.Length" "Sepal.Width"
##   .. ..$ : chr [1:2] "Can1" "Can2"
##  $ scores     :'data.frame': 150 obs. of  3 variables:
##   ..$ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ Can1   : num [1:150] -2.82 -1.86 -2.84 -2.78 -3.31 ...
##   ..$ Can2   : num [1:150] 0.322 -0.889 -0.633 -0.924 0.45 ...
##  - attr(*, "class")= chr "candisc"
summary(c1)
## 
## Canonical Discriminant Analysis for Species:
## 
##    CanRsq Eigenvalue Difference Percent Cumulative
## 1 0.80664     4.1718     4.0108 96.2843     96.284
## 2 0.13867     0.1610     4.0108  3.7157    100.000
## 
## Class means:
## 
##                Can1      Can2
## setosa     -2.81893  0.094291
## versicolor  0.99379 -0.526724
## virginica   1.82514  0.432433
## 
##  std coefficients:
##                  Can1    Can2
## Sepal.Length  1.10226 0.41969
## Sepal.Width  -0.94029 0.71201
# The canonical scores.
c("my eigen" = xyplot(scores[, 2] ~ scores[, 1],
                      groups = iris$Species,
                      grid = TRUE,
                      auto.key = list(columns = 3,
                                      title = "Species",
                                      cex.title = 1),
                      xlab = "First canonical dimension",
                      ylab = "Second canonical dimension"),
  "candisc" = xyplot(Can2 ~ Can1,
                     groups = Species,
                     data = c1$scores,
                     grid = TRUE,
                     auto.key = list(columns = 3,
                                     title = "Species",
                                     cex.title = 1),
                     xlab = "First canonical dimension",
                     ylab = "Second canonical dimension"),
  layout = c(1, NA))
## Warning in formals(fun): argument is not a function

ei$vectors[, 1:2]
##            [,1]      [,2]
## [1,]  0.6118383 0.3624970
## [2,] -0.7909829 0.9319849
c1$coeffs.raw
##                   Can1      Can2
## Sepal.Length  2.141178 0.8152721
## Sepal.Width  -2.768109 2.0960764
c1$coeffs.raw/ei$vectors[, 1:2]
##                  Can1     Can2
## Sepal.Length 3.499582 2.249045
## Sepal.Width  3.499582 2.249045
# The plot() method for `candisc` objects.
plot(c1, asp = 1)
## Vector scale factor set to 5.805
grid()

# ATTENTION: upened issue! Study the plot.candisc() function to see
# wheather they scale the vectors.
# getS3method(f = "plot", class = "candisc")

# Scores: canonical variates.
xy_can <-
    xyplot(Can2 ~ Can1,
           groups = Species,
           data = c1$scores,
           xlab = NULL,
           ylab = NULL,
           aspect = "iso",
           auto.key = list(columns = 3),
           grid = TRUE) +
    layer(panel.ellipse(...)) +
    layer(panel.arrows(0, 0,
                       c1$coeffs.raw[, 1],
                       c1$coeffs.raw[, 2],
                       length = 0.1))

xy_ori <-
    xyplot(Petal.Length ~ Petal.Width,
           groups = Species,
           aspect = "iso",
           data = iris,
           grid = TRUE) +
    layer(panel.ellipse(...))

c("Canonical" = xy_can,
  "Original" = xy_ori,
  layout = c(1, 2))
## Warning in formals(fun): argument is not a function

#--------------------------------------------
# Univariate analysis of each canical variate.

an0 <- lm(scores ~ Species, data = iris)
summary(an0)
## Response Y1 :
## 
## Call:
## lm(formula = Y1 ~ Species, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.65786 -0.19016 -0.00481  0.15641  0.97619 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        0.35137    0.04041   8.695 6.32e-15 ***
## Speciesversicolor  1.08948    0.05715  19.064  < 2e-16 ***
## Speciesvirginica   1.32703    0.05715  23.220  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2857 on 147 degrees of freedom
## Multiple R-squared:  0.8066, Adjusted R-squared:  0.804 
## F-statistic: 306.6 on 2 and 147 DF,  p-value: < 2.2e-16
## 
## 
## Response Y2 :
## 
## Call:
## lm(formula = Y2 ~ Species, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.23470 -0.32349  0.03992  0.26945  1.24542 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        5.00950    0.06288  79.667  < 2e-16 ***
## Speciesversicolor -0.27612    0.08893  -3.105  0.00228 ** 
## Speciesvirginica   0.15035    0.08893   1.691  0.09301 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4446 on 147 degrees of freedom
## Multiple R-squared:  0.1387, Adjusted R-squared:  0.127 
## F-statistic: 11.83 on 2 and 147 DF,  p-value: 1.718e-05

4 References

5 Session information

# devtools::session_info()
Sys.time()
## [1] "2017-07-26 19:32:54 BRT"
cbind(Sys.info())
##                [,1]                                          
## sysname        "Linux"                                       
## release        "4.4.0-87-generic"                            
## version        "#110-Ubuntu SMP Tue Jul 18 12:55:35 UTC 2017"
## nodename       "class"                                       
## machine        "x86_64"                                      
## login          "unknown"                                     
## user           "walmes"                                      
## effective_user "walmes"
sessionInfo()
## R version 3.4.1 (2017-06-30)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.2 LTS
## 
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.6.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
## 
## locale:
##  [1] LC_CTYPE=pt_BR.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=pt_BR.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=pt_BR.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=pt_BR.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  base     
## 
## other attached packages:
## [1] corrplot_0.77       candisc_0.7-2       heplots_1.3-3      
## [4] car_2.1-4           latticeExtra_0.6-28 RColorBrewer_1.1-2 
## [7] lattice_0.20-35     knitr_1.15.1        rmarkdown_1.3      
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.9        magrittr_1.5       splines_3.4.1     
##  [4] MASS_7.3-47        minqa_1.2.4        stringr_1.2.0     
##  [7] tools_3.4.1        parallel_3.4.1     nnet_7.3-12       
## [10] pbkrtest_0.4-6     grid_3.4.1         nlme_3.1-131      
## [13] mgcv_1.8-17        quantreg_5.29      MatrixModels_0.4-1
## [16] htmltools_0.3.5    yaml_2.1.14        lme4_1.1-12       
## [19] rprojroot_1.2      digest_0.6.12      Matrix_1.2-10     
## [22] nloptr_1.0.4       evaluate_0.10      stringi_1.1.2     
## [25] compiler_3.4.1     methods_3.4.1      backports_1.0.5   
## [28] SparseM_1.74